# Environment setup
rm(list = ls())
options(future.globals.maxSize = 4000*1024^5)
install.packages("Seurat")
library(Seurat)
library(ggplot2)
library(sctransform)
library(purrr)

# Loading gene expression and antibody hashtag matrices
sc_dataA <- Read10X(data.dir = "/Users/k1773283/OneDrive - King's College London/RNAseq analysis - Manuscript/JR scRNAseq/L236A")
scA <- CreateSeuratObject(counts = sc_dataA$`Gene Expression`, project = "L236A", min.cells = 1, min.features = 100)
hashA <- CreateSeuratObject(counts = sc_dataA$`Antibody Capture`, project = "L236A", min.cells = 1, min.features = 1)

#  https://satijalab.org/seurat/articles/hashing_vignette.html 
# Gives column names which are in both gene expression and hashtag antibody matrices
jointA <- intersect(colnames(scA@assays$RNA), colnames(hashA@assays$RNA))

# Subset RNA and HTO counts by joint cell barcodes
scA <- scA[, jointA]
hashA <- as.matrix(GetAssayData(object = hashA[, jointA], slot = "counts"))

# Confirm that the HTO have the correct names
rownames(hashA)

# Have a look at the matrix itself, and subset to discard unused hashtags
hashA[,1:100]
hashA <- hashA[1:2,]

# store mitochondrial percentage in object meta data
scA <- PercentageFeatureSet(scA, pattern = "^mt-", col.name = "percent.mt")
scA@meta.data$percent.mt

# and use it to normalise data
scA <- SCTransform(scA, vars.to.regress = "percent.mt", verbose = FALSE)

# Add HTO data as a new assay independent from RNA
scA[["HTO"]] <- CreateAssayObject(counts = hashA)

# Normalize HTO data, here we use centered log-ratio (CLR) transformation
scA <- NormalizeData(scA, assay = "HTO", normalization.method = "CLR")

# Check how many cells have 0 hashtag reads
# dim(hashA[,colSums(hashA)==0])

# How many cells have 0 RNA reads?
# scAmatrix <- as.matrix(GetAssayData(object = scA, slot = "counts"))
# dim(scAmatrix[,colSums(scAmatrix)==0])

# Measure of mean read depth (Mean counts per cell in the whole sample)
# mean(colSums(scAmatrix))

# Demultiplexing method. Set to very lenient settings as the antibody hashtags are very noisy
# Checking the demultiplexed sample shows it still gives correct sample allocations

scA <- MULTIseqDemux(
  scA,
  assay = "HTO",
  quantile = 0.1,
  autoThresh = TRUE,
  maxiter = 10,
  qrange = seq(from = 0.0, to = 0.9, by = 0.001),
  verbose = TRUE
)

# Then check how many cells in each category - adjust settings of above if required
scAhashtagsmatrix <- as.matrix(scA$MULTI_ID)
unique(scAhashtagsmatrix)

filtered_scAhashtagmatrix <- as.matrix(scAhashtagsmatrix[scAhashtagsmatrix[,1] == "HTO-AHH01-TotalSeqA",])
filtered_scAhashtagmatrix <- as.matrix(scAhashtagsmatrix[scAhashtagsmatrix[,1] == "HTO-AHH02-TotalSeqA",])
filtered_scAhashtagmatrix <- as.matrix(scAhashtagsmatrix[scAhashtagsmatrix[,1] == "HTO-AHH03-TotalSeqA",])
filtered_scAhashtagmatrix <- as.matrix(scAhashtagsmatrix[scAhashtagsmatrix[,1] == "HTO-AHH04-TotalSeqA",])
filtered_scAhashtagmatrix <- as.matrix(scAhashtagsmatrix[scAhashtagsmatrix[,1] == "Negative",])
filtered_scAhashtagmatrix <- as.matrix(scAhashtagsmatrix[scAhashtagsmatrix[,1] == "Doublet",])

nrow(filtered_scAhashtagmatrix)

# Then subset the seurat object based on the hashtag classification, giving the correct sample name
Idents(scA)
scA_h1 <- subset(scA, idents = "HTO-AHH01-TotalSeqA")
scA_h2 <- subset(scA, idents = "HTO-AHH02-TotalSeqA")
scD_h3 <- subset(scA, idents = "HTO-AHH03-TotalSeqA")
scD_h4 <- subset(scA, idents = "HTO-AHH04-TotalSeqA")

# Save the RDS under the correct sample name
# Example:
saveRDS(scA_h2, "scA_h2.rds")

# Now repeat for all samples scA-D

# DEG analysis between samples (i.e. between conditions)
# Following - https://satijalab.org/seurat/archive/v3.1/immune_alignment.html

library(Seurat)
install.packages("devtools")
devtools::install_github('satijalab/seurat-data')
library(SeuratData)
library(cowplot)
library(patchwork)

# Just as demonstration
# InstallData("ifnb")
# data("ifnb")

# Read in the demultiplexed samples and set as correct conditions
# I always performed the analysis separately for the two tissues, hence the same variable names

setwd("/Users/k1773283/OneDrive - King's College London/RNAseq analysis - Manuscript/JR scRNAseq/Suture")
youngALsut <- readRDS(file = "scA_h2.rds")
adultIFsut <- readRDS(file = "scB_h2.rds")
agedIFsut <- readRDS(file = "scC_h2.rds")
agedALsut <- readRDS(file = "scC_h4.rds")
adultALsut <- readRDS(file = "scD_h4.rds")

setwd("/Users/k1773283/OneDrive - King's College London/RNAseq analysis - Manuscript/JR scRNAseq/Periosteum")
youngALperi <- readRDS(file = "scA_h1.rds")
adultIFperi <- readRDS(file = "scB_h1.rds")
agedIFperi <- readRDS(file = "scC_h1.rds")
agedALperi <- readRDS(file = "scC_h3.rds")
adultALperi <- readRDS(file = "scD_h3.rds")

# Give each sample an identifier
head(x = youngALsut[[]])
youngALsut$original <- youngALsut$orig.ident
youngALsut$orig.ident <- "youngALsut"

head(x = adultIFsut[[]])
adultIFsut$original <- adultIFsut$orig.ident
adultIFsut$orig.ident <- "adultIFsut"

head(x = agedIFsut[[]])
agedIFsut$original <- agedIFsut$orig.ident
agedIFsut$orig.ident <- "agedIFsut"

head(x = agedALsut[[]])
agedALsut$original <- agedALsut$orig.ident
agedALsut$orig.ident <- "agedALsut"

head(x = adultALsut[[]])
adultALsut$original <- adultALsut$orig.ident
adultALsut$orig.ident <- "adultALsut"

head(x = youngALperi[[]])
youngALperi$original <- youngALperi$orig.ident
youngALperi$orig.ident <- "youngALperi"

head(x = adultALperi[[]])
adultALperi$original <- adultALperi$orig.ident
adultALperi$orig.ident <- "adultALperi"

head(x = adultIFperi[[]])
adultIFperi$original <- adultIFperi$orig.ident
adultIFperi$orig.ident <- "adultIFperi"

head(x = agedIFperi[[]])
agedIFperi$original <- agedIFperi$orig.ident
agedIFperi$orig.ident <- "agedIFperi"

head(x = agedALperi[[]])
agedALperi$original <- agedALperi$orig.ident
agedALperi$orig.ident <- "agedALperi"

combinedsut <- merge(x = youngALsut, y = c(adultIFsut, agedIFsut, agedALsut, adultALsut), add.cell.ids = c("youngALsut", "adultIFsut", "agedIFsut", "agedALsut", "adultALsut"))
combinedperi <- merge(x = youngALperi, y = c(adultIFperi, agedIFperi, agedALperi, adultALperi), add.cell.ids = c("youngALperi", "adultIFperi", "agedIFperi", "agedALperi", "adultALperi"))

head(x = combinedsut[[]])
unique(combinedsut$orig.ident)
unique(combinedperi$orig.ident)

sut.list <- SplitObject(combinedsut, split.by = "orig.ident")
peri.list <- SplitObject(combinedperi, split.by = "orig.ident")

for (i in 1:length(sut.list)) {
  sut.list[[i]] <- SCTransform(sut.list[[i]], vars.to.regress = "percent.mt", verbose = TRUE)
}
for (i in 1:length(peri.list)) {
  peri.list[[i]] <- SCTransform(peri.list[[i]], vars.to.regress = "percent.mt", verbose = TRUE)
}

# The peri.combined and sut.combined are the objects that I do the analysis on
sut.anchors <- FindIntegrationAnchors(object.list = sut.list, dims = 1:20)
sut.combined <- IntegrateData(anchorset = sut.anchors, dims = 1:20)

peri.anchors <- FindIntegrationAnchors(object.list = peri.list, dims = 1:20)
peri.combined <- IntegrateData(anchorset = peri.anchors, dims = 1:20)

# subset objects by CD45 expression (Ptprc) and Prrx1
Idents(peri.combined, WhichCells(object = peri.combined, expression = Ptprc < 0.5, slot = 'data')) <- 'cd45.neg'
Idents(peri.combined, WhichCells(object = peri.combined, expression = Ptprc >= 0.5, slot = 'data')) <- 'cd45.pos'
peri.combined$cd45 <- Idents(peri.combined)

Idents(peri.combined, WhichCells(object = peri.combined, expression = Prrx1 < 1, slot = 'data')) <- 'prrx1.neg'
Idents(peri.combined, WhichCells(object = peri.combined, expression = Prrx1 >= 1, slot = 'data')) <- 'prrx1.pos'
peri.combined$prrx1 <- Idents(peri.combined)

# ===============================================================================================================================================================================
# ===============================================================================================================================================================================
# ===============================================================================================================================================================================
# Carmona suggested to split the overall sample into three: CD45-/Prrx1+ (osteo), CD45+/Prrx1- (immune), CD45-/Prrx1- (Fibro/endo), and I included the 4th: CD45+/Prrx1+ (overlap)

# Create total CD45 negative pop
Idents(peri.combined) <- peri.combined$cd45
peri.combined_cd45neg <- subset(peri.combined, idents = "cd45.neg")

# Isolate osteo component C-/P+
Idents(peri.combined_cd45neg) <- peri.combined_cd45neg$prrx1
peri.combined_cd45neg_prrx1pos <- subset(peri.combined_cd45neg, idents = "prrx1.pos")

# Isolate fibro/endo component C-/P-
Idents(peri.combined_cd45neg) <- peri.combined_cd45neg$prrx1
peri.combined_cd45neg_prrx1neg <- subset(peri.combined_cd45neg, idents = "prrx1.neg")

# Create total CD45 positive pop
Idents(peri.combined) <- peri.combined$cd45
peri.combined_cd45pos <- subset(peri.combined, idents = "cd45.pos")

# Isolate immune component C+/P-
Idents(peri.combined_cd45pos) <- peri.combined_cd45pos$prrx1
peri.combined_cd45pos_prrx1neg <- subset(peri.combined_cd45pos, idents = "prrx1.neg")

# Isolate overlap component C+/P+
Idents(peri.combined_cd45pos) <- peri.combined_cd45pos$prrx1
peri.combined_cd45pos_prrx1pos <- subset(peri.combined_cd45pos, idents = "prrx1.pos")

# Add them back in as metadata
Idents(peri.combined, WhichCells(object = peri.combined_cd45neg_prrx1neg)) <- 'fibro'
Idents(peri.combined, WhichCells(object = peri.combined_cd45neg_prrx1pos)) <- 'osteo'
Idents(peri.combined, WhichCells(object = peri.combined_cd45pos_prrx1neg)) <- 'immune'
Idents(peri.combined, WhichCells(object = peri.combined_cd45pos_prrx1pos)) <- 'overlap'

peri.combined$grouping <- Idents(peri.combined)

# Counts the cells in each grouped condition
library(data.table)
library(magrittr)

## extract meta data
md <- peri.combined@meta.data %>% as.data.table
# the resulting md object has one "row" per cell
## count the number of cells per unique combinations of "Sample" and "seurat_clusters"
md[, .N, by = c("orig.ident", "grouping")]
groupingcounts_bycondition <- md[, .N, by = c("orig.ident", "grouping")] %>% dcast(., orig.ident ~ grouping, value.var = "N")
class(groupingcounts_bycondition)
library(dplyr)
groupingcounts_bycondition %>%
  mutate(Total = rowSums())

groupingcounts_bycondition$TotalCells = rowSums(groupingcounts_bycondition[,2:ncol(groupingcounts_bycondition)], na.rm=TRUE)
groupingcounts_bycondition
write.xlsx(groupingcounts_bycondition, "perigroupingcounts.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# ===============================================================================================================================================================================
# ===============================================================================================================================================================================
# ===============================================================================================================================================================================


sut.combined.osteo <- subset(sut.combined, idents = "cd45.neg")

Idents(peri.combined.cd45.neg, WhichCells(object = peri.combined.cd45.neg, expression = Prrx1 >= 1, slot = 'data')) <- 'prrx1.pos'

DimPlot(subset(peri.combined,idents = "cd45.neg"), reduction = "umap", group.by = "cluster")
DimPlot(peri.combined, reduction = "umap", group.by = "cluster")
DimPlot(subset(peri.combined,idents = "prrx1.neg"), reduction = "umap", group.by = "orig.ident")

DimPlot(sut.combined, reduction = "umap", group.by = "cd45")

Idents(peri.combined.cd45.neg) <- peri.combined.cd45.neg$cluster
VlnPlot(peri.combined.cd45.neg, features = c("Ptprc","Prrx1"), pt.size = 0.6)

Idents(sut.combined.cd45.neg) <- sut.combined.cd45.neg$cluster
VlnPlot(sut.combined.cd45.neg2, features = c("Ptprc","Prrx1","Col1a1"), pt.size = 0.6)
VlnPlot(sut.combined, features = c("Alpl","Spp1"), pt.size = 0.6)

Idents(sut.combined) <- sut.combined$cluster

FeatureScatter(sut.combined,"Ptprc","Prrx1")
DimPlot(peri.combined.cd45.neg.prrx1.pos, reduction = "umap", group.by = "orig.ident")
peri.combined.cd45.neg.prrx1.pos$orig.ident
Idents(peri.combined.cd45.neg.prrx1.pos) <- peri.combined.cd45.neg.prrx1.pos$orig.ident
FindMarkers(peri.combined.cd45.neg.prrx1.pos, ident.1 = "youngALperi", ident.2 = "agedALperi", verbose = TRUE)


DoHeatmap(peri.combined, features = c("Ptprc","Prrx1"), group.by="orig.ident")

DimPlot(peri.combined, reduction = "umap", group.by = "grouping",split.by = "orig.ident")
DimPlot(peri.combined, reduction = "umap", group.by = "orig.ident",split.by = "grouping")


# Subset objects by CD45 cluster expression
Idents(peri.combined)
peri.combined.cd45.neg2 <- subset(peri.combined, idents = c("0","1","3","4","8","9","11","12","14","15","17"))
DimPlot(peri.combined.cd45.neg2, reduction = "umap", group.by = "cluster")
FeaturePlot(peri.combined.cd45.neg2, features = c("Ptprc","Prrx1"), pt.size = 0.2, ncol = 3)
peri.combined.cd45.neg2 <- RenameIdents(object = peri.combined.cd45.neg2, `0` = "0Endothelial", `1` = "Fibroblast-SSC", `3` = "Osteo-SSC",`4` = "4Endothelial",`8` = "8Unknown",`9` = "9Unknown",`11` = "11Unknown",`12` = "12Proliferative",`14` = "14Unknown",`15` = "Neuromuscular",`17` = "17Unknown")
peri.combined.cd45.neg2$assignedcelltype <- Idents(peri.combined.cd45.neg2)
saveRDS(peri.combined.cd45.neg2, "periosteum_cd45negclusters.rds")

Idents(sut.combined)
DimPlot(sut.combined, reduction = "umap", label = TRUE,group.by = "cluster")
FeaturePlot(sut.combined, features = c("Ptprc","Mcam"), label=TRUE, pt.size = 0.2)
sut.combined.cd45.neg2 <- subset(sut.combined, idents = c("0","5","2","20","8","14","11","18","7","15","12","16"))
sut.combined.cd45.neg2 <- RenameIdents(object = sut.combined.cd45.neg2, `0` = "0Unknown", `2` = "2Unknown", `5` = "5Unknown",`7` = "7Proliferative",`8` = "8Unknown",`11` = "SSC",`12` = "12Unknown",`14` = "14Unknown",`15` = "15Unknown",`16` = "16Unknown",`18` = "Endothelial",`20` = "20Unknown")
sut.combined.cd45.neg2$assignedcelltype <- Idents(sut.combined.cd45.neg2)

# ================

DefaultAssay(sut.combined) <- "integrated"
ElbowPlot(sut.combined)

DefaultAssay(peri.combined) <- "integrated"
ElbowPlot(peri.combined)

# Run the standard workflow for visualization and clustering
sut.combined <- ScaleData(sut.combined, verbose = FALSE)
sut.combined <- RunPCA(sut.combined, npcs = 30, verbose = FALSE)

peri.combined <- ScaleData(peri.combined, verbose = FALSE)
peri.combined <- RunPCA(peri.combined, npcs = 30, verbose = FALSE)

# t-SNE and Clustering
sut.combined <- RunUMAP(sut.combined, reduction = "pca", dims = 1:20)
sut.combined <- FindNeighbors(sut.combined, reduction = "pca", dims = 1:20)
sut.combined <- FindClusters(sut.combined, resolution = 0.5)

peri.combined <- RunUMAP(peri.combined, reduction = "pca", dims = 1:20)
peri.combined <- FindNeighbors(peri.combined, reduction = "pca", dims = 1:20)
peri.combined <- FindClusters(peri.combined, resolution = 0.5)

# Visualization
library(SeuratData)
library(ggplot2)
p1 <- DimPlot(sut.combined, reduction = "umap", group.by = "orig.ident")
p2 <- DimPlot(sut.combined, reduction = "umap", label = FALSE)
plot_grid(p1, p2)
DimPlot(sut.combined, reduction = "umap", split.by = "orig.ident")

DimPlot(peri.combined, reduction = "umap", group.by = "orig.ident")
DimPlot(peri.combined, reduction = "umap", label = TRUE)
plot_grid(p3, p4)
DimPlot(peri.combined, reduction = "umap", split.by = "orig.ident")

# Identify conserved genes by cluster
DefaultAssay(sut.combined) <- "RNA"
DefaultAssay(peri.combined) <- "RNA"

install.packages('BiocManager')
BiocManager::install('multtest')
install.packages('metap')
library(metap)
library(multtest)
install.packages("remotes")
remotes::install_github("metaOmics/MetaDE")
library(Seurat)


head(c11markers, n=10)
library(openxlsx)
write.xlsx(c11markers, "suture c11markers.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# Finding markers for each cluster, separated for speed of re-running where needed
Idents(sut.combined) <- sut.combined$cluster
c0markers <- FindConservedMarkers(sut.combined, ident.1 = 0, grouping.var = "orig.ident", verbose = TRUE)
c1markers <- FindConservedMarkers(sut.combined, ident.1 = 1, grouping.var = "orig.ident", verbose = TRUE)
c2markers <- FindConservedMarkers(sut.combined.osteo, ident.1 = 2, grouping.var = "orig.ident", verbose = TRUE)
c3markers <- FindConservedMarkers(sut.combined.osteo, ident.1 = 3, grouping.var = "orig.ident", verbose = TRUE)
c4markers <- FindConservedMarkers(sut.combined.osteo, ident.1 = 4, grouping.var = "orig.ident", verbose = TRUE)
c5markers <- FindConservedMarkers(sut.combined, ident.1 = 5, grouping.var = "orig.ident", verbose = TRUE)
c6markers <- FindConservedMarkers(sut.combined, ident.1 = 6, grouping.var = "orig.ident", verbose = TRUE)
c7markers <- FindConservedMarkers(sut.combined.osteo, ident.1 = 7, grouping.var = "orig.ident", verbose = TRUE)
c8markers <- FindConservedMarkers(sut.combined, ident.1 = 8, grouping.var = "orig.ident", verbose = TRUE)
c9markers <- FindConservedMarkers(sut.combined, ident.1 = 9, grouping.var = "orig.ident", verbose = TRUE)
c10markers <- FindConservedMarkers(sut.combined, ident.1 = 10, grouping.var = "orig.ident", verbose = TRUE)
c11markers <- FindConservedMarkers(sut.combined.osteo, ident.1 = 11, grouping.var = "orig.ident", verbose = TRUE)
c12markers <- FindConservedMarkers(sut.combined.osteo, ident.1 = 12, grouping.var = "orig.ident", verbose = TRUE)
c13markers <- FindConservedMarkers(sut.combined, ident.1 = 13, grouping.var = "orig.ident", verbose = TRUE)
c14markers <- FindConservedMarkers(sut.combined, ident.1 = 14, grouping.var = "orig.ident", verbose = TRUE)
c15markers <- FindConservedMarkers(sut.combined.osteo, ident.1 = 15, grouping.var = "orig.ident", verbose = TRUE)
c16markers <- FindConservedMarkers(sut.combined, ident.1 = 16, grouping.var = "orig.ident", verbose = TRUE)
c17markers <- FindConservedMarkers(sut.combined, ident.1 = 17, grouping.var = "orig.ident", verbose = TRUE)
c18markers <- FindConservedMarkers(sut.combined, ident.1 = 18, grouping.var = "orig.ident", verbose = TRUE)
c19markers <- FindConservedMarkers(sut.combined, ident.1 = 19, grouping.var = "orig.ident", verbose = TRUE)
c20markers <- FindConservedMarkers(sut.combined, ident.1 = 20, grouping.var = "orig.ident", verbose = TRUE)

# Chose not to rename idents, but left this as notes of manual cluster cell types.
# sut.combined <- RenameIdents(sut.combined, `0` = "Pre B Cell", `1` = "Myeloid", `2` = "APC/B Cell", 
#                                 `3` = "Neutrophil", `4` = "Monocytes", `5` = "B Cell", `6` = "M2 Macrophage", `7` = "Proliferation?", `8` = "8", `9` = "T Cell", 
#                                 `10` = "M1 Macrophage", `11` = "SSC", `12` = "Erythrocytes", `13` = "Gran-mono progenitors", `14` = "Pre-B Cell", `15` = "Mast Cell",
#                                 `16` = "Erythocytes2", `17` = "Macrophage", `18` = "Vascular Endothelial", `19` = "Unknown", `20` = "20 Cell")
# 

write.xlsx(c3pmarkers, "periosteum_SSC-3-markers.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)


Idents(peri.combined)
peri.combined$cluster <- Idents(peri.combined)
Idents(peri.combined) <- peri.combined$cluster

# Repeating markers and manual identification for periosteum
install.packages('BiocManager')
BiocManager::install('multtest')
install.packages('metap')
head(c16pmarkers, n=10)
c0pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 0, grouping.var = "orig.ident", verbose = TRUE)
c1pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 1, grouping.var = "orig.ident", verbose = TRUE)
c2pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 2, grouping.var = "orig.ident", verbose = TRUE)
c3pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 3, grouping.var = "orig.ident", verbose = TRUE)
c4pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 4, grouping.var = "orig.ident", verbose = TRUE)
c5pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 5, grouping.var = "orig.ident", verbose = TRUE)
c6pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 6, grouping.var = "orig.ident", verbose = TRUE)
c7pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 7, grouping.var = "orig.ident", verbose = TRUE)
c8pmarkers <- FindConservedMarkers(peri.combined.osteo, ident.1 = 8, grouping.var = "orig.ident", verbose = TRUE)
c9pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 9, grouping.var = "orig.ident", verbose = TRUE)
c10pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 10, grouping.var = "orig.ident", verbose = TRUE)
c11pmarkers <- FindConservedMarkers(peri.combined.osteo, ident.1 = 11, grouping.var = "orig.ident", verbose = TRUE)
c12pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 12, grouping.var = "orig.ident", verbose = TRUE)
c13pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 13, grouping.var = "orig.ident", verbose = TRUE)
c14pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 14, grouping.var = "orig.ident", verbose = TRUE)
c15pmarkers <- FindConservedMarkers(peri.combined.osteo, ident.1 = 15, grouping.var = "orig.ident", verbose = TRUE)
c16pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 16, grouping.var = "orig.ident", verbose = TRUE)
c17pmarkers <- FindConservedMarkers(peri.combined, ident.1 = 17, grouping.var = "orig.ident", verbose = TRUE)

# peri.combined <- RenameIdents(peri.combined, `0` = "Endothelial", `1` = "Fibroblast/MSC", `2` = "B Cell", 
#                                 `3` = "MSC/Osteo", `4` = "Vascular Endothelial", `5` = "Pre-B Cell", `6` = "Neutrophil", `7` = "Macrophage", `8` = "Low Ribosomal", `9` = "Dying", 
#                                 `10` = "T Cell", `11` = "X", `12` = "Mitotic", `13` = "Macrophage", `14` = "Erythrocyte", `15` = "Neuromuscular",
#                                 `16` = "Myeloid", `17` = "Mast Cell")
# 

# Manually assign cell type names then subset the object accordingly

Idents(peri.combined.osteo)
peritestnames <- RenameIdents(peritestnames, `0` = "Osteoblast Progenitor", `1` = "Osteoblast Progenitor", `2` = "Osteoblast Progenitor",
                                `3` = "Endothelial Progenitor", `4` = "Pre Osteoblast", `5` = "Osteoblast", `6` = "Pre Osteoblast", `7` = "B Cell", `8` = "Pre B Cell", `9` = "Endothelial Progenitor",
                                `10` = "SSC", `11` = "Erythroid", `12` = "Macrophage", `13` = "Endothelial Progenitor", `14` = "B Cell", `15` = "Macrophage")
Idents(peritestnames) <- peritestnames$cluster_new

# Note that these 2 are not the same!!!!
peritestnames$cluster
peritestnames$cluster_new

peritest$cluster_new <- Idents(peritest)

peritestnames$JRmanualcelltype <- Idents(peritestnames)
Idents(peritestnames)

Idents(peritestnames) <- peritestnames$cluster_new
peri.combined.osteo2 <- subset(peritestnames, idents = c("0","1","2","3","4","5","6","9","10","13"))
saveRDS(peri.combined.osteo2, "periosteum_manualosteoonly")

# Same for suture, select only non-immune clusters

Idents(sut.combined.osteo) <- sut.combined.osteo$cluster_new
sut.combined.osteo <- RenameIdents(sut.combined.osteo, `0` = "B Cell", `1` = "B Cell", `2` = "Red Blood Cell",
                              `3` = "Low Expression", `4` = "Dying", `5` = "B Cell", `6` = "B Cell", `7` = "Erythrocyte", `8` = "Osteo Progenitor", `9` = "Macrophage",
                              `10` = "SSC", `11` = "Fibroblast", `12` = "T Cell", `13` = "Macrophage", `14` = "Haemo Progenitor", `15` = "Haemo Progenitor", `16` = "Haemo Progenitor")

sut.combined.osteo$JRmanualcelltype <- Idents(sut.combined.osteo)
sut.combined.osteo$JRmanualcelltype
sut.combined.osteo$cluster_new

Idents(sut.combined.osteo) <- sut.combined.osteo$cluster_new
sut.combined.osteo2<- subset(sut.combined.osteo, idents = c("8","10","11"))
saveRDS(sut.combined.osteo2, "suture_manualosteoonly")

DimPlot(peri.combined,group.by = "grouping")

# SAVE THE SEURAT FILES
saveRDS(sut.combined, "sutureallsamplescombined")
saveRDS(peri.combined, "periosteumallsamplescombined")

setwd("D:/Users/zeiss/Desktop/singlecell")

sut.combined <- readRDS(file = "sutureallsamplescombined")
peri.combined <-readRDS(file = "periosteumallsamplescombined")

plotmarkers <- c("Col1a1","Prrx1","Nes","Thy1","Mcam","Ptprc","Cdh13","S100a4")

FeaturePlot(peri.combined, features = plotmarkers, pt.size = 0.2, 
            ncol = 3)

library(data.table)
library(magrittr)

## extract meta data
md <- sut.combined@meta.data %>% as.data.table
# the resulting md object has one "row" per cell
## count the number of cells per unique combinations of "Sample" and "seurat_clusters"
md[, .N, by = c("orig.ident", "cluster")]
clustercounts_bycondition <- md[, .N, by = c("orig.ident", "cluster")] %>% dcast(., orig.ident ~ cluster, value.var = "N")
class(clustercounts_bycondition)
library(dplyr)
clustercounts_bycondition %>%
  mutate(Total = rowSums())

clustercounts_bycondition$TotalCells = rowSums(clustercounts_bycondition[,2:ncol(clustercounts_bycondition)], na.rm=TRUE)
clustercounts_bycondition
write.xlsx(clustercounts_bycondition, "sutureclustercounts.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)


# We can explore these marker genes for each cluster and use them to annotate our clusters as specific cell types.
# FeaturePlot(immune.combined, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", 
#                                           "CCL2", "PPBP"), min.cutoff = "q9")

# DimPlot(immune.combined, label = TRUE)

# Save the cluster numbers as cluster ident, and make new ident for cluster_sample
Idents(peri.combined.osteo2) <- peri.combined.osteo2$JRmanualcelltype
peri.combined$cluster <- Idents(peri.combined)
peri.combined.osteo2$JRmanualcelltype_sample <- paste(Idents(peri.combined.osteo2), peri.combined.osteo2$orig.ident, sep = "_")
peri.combined$orig.ident
peri.combined$cluster <- Idents(peri.combined)

Idents(sut.combined)
sut.combined$cluster <- Idents(sut.combined)
sut.combined$cluster_sample <- paste(Idents(sut.combined), sut.combined$orig.ident, sep = "_")

# Set the active idents in the format cluster_samplename e.g. 11_agedIFsut
Idents(sut.combined) <- "cluster_sample"
Idents(peri.combined.osteo2) <- "JRmanualcelltype_sample"
unique(peri.combined.osteo2$JRmanualcelltype_sample)

# Then run this to find differentially expressed genes between 2 samples.
DefaultAssay(peri.combined.osteo2) <- 'SCT'
PrepSCTFindMarkers(peri.combined.osteo2, assay = "SCT", verbose = TRUE)

Idents(peri.combined.osteo2) <- "JRmanualcelltype_sample"
whichgenesdiffer <- FindMarkers(peri.combined.osteo2,ident.1 = "Endothelial Progenitor_youngALperi", ident.2 = "Endothelial Progenitor_agedALperi", verbose = TRUE, assay = "SCT", recorrect_umi = FALSE)
Idents(peri.combined.osteo2) <- "orig.ident"
whichgenesdiffer <- FindMarkers(peri.combined.osteo2,ident.1 = "youngALperi", ident.2 = "agedALperi", verbose = TRUE, assay = "RNA")
whichgenesdiffer$rownames <- rownames(whichgenesdiffer)

Idents(peri.combined.osteo2) <- "JRmanualcelltype_sample"
whichgenesdiffer2 <- FindMarkers(peri.combined.osteo2,ident.1 = "Endothelial Progenitor_agedIFperi", ident.2 = "Endothelial Progenitor_agedALperi", verbose = TRUE, assay = "SCT", recorrect_umi = FALSE)

# https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02451-7#Sec12
# https://github.com/satijalab/seurat/issues/5321 
# https://satijalab.org/seurat/archive/v3.0/sctransform_vignette.html

# This suggests to use SCT scale.data to do DE testing.

Idents(peri.combined.osteo2) <- "orig.ident"
whichgenesdiffer <- FindMarkers(peri.combined.osteo2, slot = "data", ident.1 = "youngALperi", ident.2 = "agedALperi", verbose = TRUE, assay = "RNA", recorrect_umi = FALSE)
whichgenesdiffer$rownames <- rownames(whichgenesdiffer)
Idents(peri.combined.osteo2) <- "orig.ident"
whichgenesdiffer2 <- FindMarkers(peri.combined.osteo2, slot = "data", ident.1 = "agedIFperi", ident.2 = "agedALperi", verbose = TRUE, assay = "RNA", recorrect_umi = FALSE)
whichgenesdiffer2$rownames <- rownames(whichgenesdiffer2)
intersecting_genes <- whichgenesdiffer[whichgenesdiffer$rownames %in% whichgenesdiffer2$rownames,]
intersecting_genelist <- rownames(intersecting_genes)
write.xlsx(intersecting_genes, "intersecting_genes.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)

pos_whichgenesdiffer <- whichgenesdiffer[whichgenesdiffer$avg_diff > 0,]
pos_whichgenesdiffer2 <- whichgenesdiffer2[whichgenesdiffer2$avg_diff > 0,]
pos_intersecting_genes <- pos_whichgenesdiffer[pos_whichgenesdiffer$rownames %in% pos_whichgenesdiffer2$rownames,]
pos_intersecting_genelist <- rownames(intersecting_genes)
write.xlsx(pos_intersecting_genes, "pos_intersecting_genes.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)


#Intersecting DEGs in the Prrx1+CD45- component of periosteum
# peri.combined_cd45neg_prrx1pos <- SCTransform(peri.combined_cd45neg_prrx1pos, vars.to.regress = "percent.mt", verbose = TRUE, return.only.var.genes = F)

Idents(peri.combined_cd45neg_prrx1pos) <- "orig.ident"
whichgenesdiffer <- FindMarkers(peri.combined_cd45neg_prrx1pos, slot = "data", ident.1 = "youngALperi", ident.2 = "agedALperi", verbose = TRUE, assay = "RNA", recorrect_umi = FALSE)
whichgenesdiffer$rownames <- rownames(whichgenesdiffer)
Idents(peri.combined_cd45neg_prrx1pos) <- "orig.ident"
whichgenesdiffer2 <- FindMarkers(peri.combined_cd45neg_prrx1pos, slot = "data", ident.1 = "agedIFperi", ident.2 = "agedALperi", verbose = TRUE, assay = "RNA", recorrect_umi = FALSE)
whichgenesdiffer2$rownames <- rownames(whichgenesdiffer2)

sig_whichgenesdiffer <- whichgenesdiffer[whichgenesdiffer$p_val < 0.05,]
sig_whichgenesdiffer2 <- whichgenesdiffer2[whichgenesdiffer2$p_val < 0.05,]


intersecting_genes <- sig_whichgenesdiffer[sig_whichgenesdiffer$rownames %in% sig_whichgenesdiffer2$rownames,]
intersecting_genelist <- rownames(intersecting_genes)
write.xlsx(intersecting_genes, "intersecting_genes.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)
write.xlsx(whichgenesdiffer2, "whichgenesdiffer2.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)
write.xlsx(whichgenesdiffer, "whichgenesdiffer.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)



pos_whichgenesdiffer <- whichgenesdiffer[whichgenesdiffer$avg_log2FC > 0,]
pos_whichgenesdiffer2 <- whichgenesdiffer2[whichgenesdiffer2$avg_log2FC > 0,]

sigpos_whichgenesdiffer <- pos_whichgenesdiffer[pos_whichgenesdiffer$p_val < 0.05,]
sigpos_whichgenesdiffer2 <- pos_whichgenesdiffer2[pos_whichgenesdiffer2$p_val < 0.05,]

pos_intersecting_genes <- sigpos_whichgenesdiffer[sigpos_whichgenesdiffer$rownames %in% sigpos_whichgenesdiffer2$rownames,]

write.xlsx(pos_intersecting_genes, "pos_intersecting_genes.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)
write.xlsx(pos_whichgenesdiffer, "pos_whichgenesdiffer.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)
write.xlsx(pos_whichgenesdiffer2, "pos_whichgenesdiffer2.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)


whichgenesdiffer3 <- FindMarkers(peri.combined_cd45neg_prrx1pos, slot = "data", ident.1 = "adultALperi", ident.2 = "agedALperi", verbose = TRUE, assay = "RNA")

# Gives all measured genes as a background
background_genes <- rownames(peri.combined_cd45neg_prrx1pos@assays$RNA@meta.features)
write.xlsx(as.data.frame(background_genes), "background_genes.xlsx",sheetName="Data", colNames=T,rowNames=T,showNA = F, Append = TRUE)


DefaultAssay(peri.combined.osteo2) <- 'RNA'
Peri.combined.osteo2 <- SCTransform(peri.combined.osteo2, vars.to.regress = "percent.mt", verbose = TRUE, return.only.var.genes = F)


DefaultAssay(peri.combined.osteo2) <- 'RNA'
NormalizeData(
  peri.combined.osteo2,
  normalization.method = "LogNormalize",
  scale.factor = 10000,
  margin = 1,
  verbose = TRUE,
  )

# A form of GO analysis

# install.packages(dnet)
# dEnricher(genelist, identity = symbol, check.symbol.identity = FALSE, 
          # genome = Mm, ontology = GOBP, sizeRange = c(10, 1000), min.overlap = 3, which_distance = NULL, 
          # test = FisherTest, p.adjust.method = BH, ontology.algorithm = none, verbose = T, RData.location = "https://github.com/hfang-bristol/RDataCentre/blob/master/dnet/1.0.7")

ggplot(IFgenes, aes(x=pct.2, y=pct.1)) +
  geom_point() + 
  geom_text(label=rownames(IFgenes))

# Generate pseudobulk and probe it for genes of interest
Idents(sut.combined)
sut.cluster.averages <- AverageExpression(sut.combined)
sut.cluster.averages

# Known genes of interest, saved as lists
nad_genelist <- c("Bst1","Slc12a8","Nampt","Nmnat1","Nmnat2","Nmnat3","Naprt","Nmrk1","Nmrk2","Cd38","Sirt1","Sirt2","Sirt3","Sirt4","Sirt5","Sirt6","Sirt7","Parp1","Parp2","Parp3","Parp4","Parp6","Parp8","Parp9","Parp10","Parp11","Parp12","Parp14","Sarm1")
autoph_genelist <- c("Becn1","Pik3c3","Atg9a","Rb1cc1","Atg13","Ulk1","Ulk2","Atg3","Atg5","Atg7","Atg12","Atg16l1","Atg4b","Atg4c","Map1lc3b","Gabarap","Wdr45")
wnt_genelist <- c("Porcn","Wnt1","Wnt2","Wnt2b","Wnt3","Wnt3a","Wnt4","Wnt5a","Wnt6","Wnt7a","Wnt7b","Wnt8a","Wnt8b","Wnt9a","Wnt9b","Wnt10a","Wnt10b","Wnt11","Wnt16","Cer1","Notum","Wif1","Serpinf1","Sost","Dkk1","Dkk2","Dkk4","Sfrp1","Sfrp2","Sfrp4","Sfrp5","Rspo1","Rspo2","Rspo3","Rspo4","Lgr4","Lgr5","Lgr6","Rnf43","Znrf3","Fzd1","Fzd2","Fzd3","Fzd4","Fzd5","Fzd6","Fzd7","Fzd8","Fzd9","Fzd10","Lrp5","Lrp6","Bambi","Csnk1e","Dvl1","Dvl2","Dvl3","","","","","","","","","")

cluster_list <- c("11_agedIFsut", "11_agedALsut","11_youngALsut")
s1 <- sut.cluster.averages[["SCT"]][,]
s2 <- subset(s1, rownames(s1) %in% nad_genelist)
s3 <- s2[,cluster_list]



Idents(peri.combined)
peri.cluster.averages <- AverageExpression(peri.combined)


head(peri.cluster.averages[["SCT"]][1:5,])

library(openxlsx)
write.xlsx(sut.cluster.averages, "pseudobulk suture.xlsx",sheetName="Suture", colNames=T,rowNames=T,showNA = F, Append = TRUE)
write.xlsx(peri.cluster.averages, "pseudobulk periosteum.xlsx",sheetName="Periosteum", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# Investigate individual known genes
library(ggplot2)
# m2markers
VlnPlot(sut.combined, features = c("Cd68", "Cd163","Mrc1","Egr2"), pt.size = 0.6)

# Endothelial markers
VlnPlot(sut.combined.osteo, features = c("Tek","Cdh5","Emcn","Pecam1","Cd34","Kdr","Prom1"), pt.size = 0.6)

# Treg markers
VlnPlot(sut.combined, features = c("Foxp3","Il2ra","Ctla4","Tnfrsf18","Lag3","Il7r","Gzmb","Klrg1","Ccr4","Il10"), pt.size = 0.6)
Idents(sut.combined) <- sut.combined$cluster

# SSC markers
VlnPlot(sut.combined, features = c("Lepr","Prrx1","Cd200","Thy1","Ctsk","Acta2","Cdh13","Plxna2","Gli1","Mcam","Pdgfrb"), pt.size = 0.6)
VlnPlot(sut.combined.osteo, features = c("Ngfr","Eng","Lepr","Prrx1","Cd200","Thy1","Ctsk","Acta2","Cdh13","Plxna2","Gli1","Mcam","Pdgfrb"), pt.size = 0.6)

# HSC markers
VlnPlot(sut.combined.osteo, features = c("Ly6a","Cd27","Cd34","Cd38","Spn","Cd48","Kit","Slamf1"), pt.size = 0.6)

# ISCT minimal MSC markers (in vitro/FACS)
VlnPlot(sut.combined.osteo, features = c("Cd14","Ptprc","Cd79a","Thy1","Eng","Nt5e"), pt.size = 0.6)

# Tissue resident fibroblast markers https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5079827/
VlnPlot(sut.combined.osteo, features = c("Adam12","Postn","Gli1","Fsp1","Col1a1","Col1a2","Tcf12","Tcf4","Nes"), pt.size = 0.6)


# Fibroblast markers
VlnPlot(sut.combined.osteo, features = c("Vim","Loxl1","Lum","S100a4","Fbln1","Fbln2","Serpinh1"), pt.size = 0.6)

# B cell markers
VlnPlot(sut.combined.osteo, features = c("Ms4a1","Cd79a"), pt.size = 0.6)
FeaturePlot(sut.combined.osteo, features = c("Cd79a","Prrx1"), label = TRUE)

# Classical monocyte markers
VlnPlot(peri.combined, features = c("Ccl6","Fn1","Ccr2","Itgam"), pt.size = 0.6)

# Atypical monocyte markers
VlnPlot(peri.combined, features = c("Clqc","Fgcr4","Itgal","Cx3cr1","Klf1"), pt.size = 0.6)

# Classical and Atypical monocyte markers
VlnPlot(peri.combined, features = c("Ccl6","Fn1","Ccr2","Itgam","Clqc","Fgcr4","Itgal","Cx3cr1","Klf1"), pt.size = 0.6)

# Macrophage markers
VlnPlot(sut.combined.osteo, features = c("Cd74","Ly6a","Stat1","H2-Aa","Isg15","Saa3","Cxcl9"), pt.size = 0.6)

# Are 'immune cells' actually fibroblasts?
# Looking at """fibroblast-specific""" markers, many clusters positive
VlnPlot(sut.combined.osteo, features = c("Ccr2","S100a4","Vim","Acta2","Serpinh1"), pt.size = 0.6)

# However there is overlap with
# Macrophage markers
VlnPlot(peri.combined, features = c("Ptprc","Adgre1","Itgax","Itgam"), pt.size = 0.6)
# Genetic (macrophage-cre) markers
VlnPlot(sut.combined.osteo, features = c("Lyz2","Cx3cr1","Itgam","Csf1r"), pt.size = 0.6)
# General macrophage markers
VlnPlot(peri.combined.osteo, features = c("Pla2g7","Sparc","Cd14","Cd68","Fcgr3","Ccl3","Ccl4"), pt.size = 0.6)

# Overlap of macrophage and fibroblast highlighted in Peri clusters 7,13,16 here
VlnPlot(peri.combined, features = c("S100a4","Lyz2"), pt.size = 0.6)

# m1markers
VlnPlot(peri.combined, features = c("Marco","Nos2","Fpr2","Fcgr1","Fcgr2b","Fcgr3","Cd80","Cd86"), pt.size = 0.6)

# m2markers https://www.frontiersin.org/articles/10.3389/fimmu.2019.01084/full#supplementary-material 
VlnPlot(peri.combined, features = c("Arg1","Mgl2","Tmem26","Rnase2a","Mrc1","Egr2","Flt1","Chil3"), pt.size = 0.6)

# m1markers https://www.frontiersin.org/articles/10.3389/fimmu.2019.01084/full#supplementary-material 
VlnPlot(peri.combined, features = c("Wipi1","Mlx","42795","Gdi1","Zdhhc6","Stam","Prag1","Coro1a"), pt.size = 0.6)

# Osteoclast markers
VlnPlot(peri.combined.osteo, features = c("Ctsk","Calcr","Mmp9","Acp5","Car2","Snca","Trim10"), pt.size = 0.6)
VlnPlot(sut.combined.osteo, features = c("Ctsk","Calcr","Mmp9","Acp5","Car2"), pt.size = 0.6)

# Neuro markers
VlnPlot(sut.combined.osteo, features = c("Calca","Vip","Mmp9","Acp5","Car2"), pt.size = 0.6)

# Osteo and Wnt targets
VlnPlot(sut.combined.osteo, features = c("Col1a1","Col1a2","Postn","Procr","Ibsp","Alpl","Mme","Slc44a1","Ror2","Spp1","Bglap","Sp7","Lepr","Prrx1","Cd200","Thy1","Ctsk","Acta2","Cdh13","Plxna2","Gli1","Mcam","Pdgfrb"), pt.size = 0.6)
VlnPlot(peri.combined, features = c("Col3a1","Prrx1"), pt.size = 0.6)

# Neutrophil Markers, regenerative
VlnPlot(peri.combined, features = c("Ly6g","Cxcr4","Itga4","Flt1","Chil3","Arg1","Il10"), pt.size = 0.6)
VlnPlot(sut.combined, features = c("Cxcr4","Itga4","Flt1","Chil3","Arg1","Il10"), pt.size = 0.6)
VlnPlot(peri.combined, features = c("Ly6g"), pt.size = 0.6)
VlnPlot(sut.combined, features = c("Ly6g"), pt.size = 0.6)
FeatureScatter(subset(peri.combined, idents = c("0","1","3","9","15")), "Prrx1", "Thy1")
FeatureScatter(peri.combined, "Mcam", "Cdh13")
FeatureScatter(subset(sut.combined, idents = c("3","4","13","19")), "Tnf", "Cxcr4")
FeatureScatter(subset(peri.combined, idents = "4"), "Pecam1", "Mcam")

# Osteoblast markers
VlnPlot(sut.combined.osteo, features = c("Col1a1","Col1a2","Postn","Alpl","Ror2","Spp1","Bglap","Bglap2","Runx2"), pt.size = 0.6)

# Stem markers
VlnPlot(peri.combined.osteo, features = c("Procr","Axin2","Sp5","Lgr4","Klf4","Nanog","Pou5f1","Zfp42","Sox2","Lgr5","Wnt10a","Wnt10b"), pt.size = 0.6)

# Wnts - which are the niche cells?
VlnPlot(peri.combined, features = c("Porcn","Wnt1","Wnt2","Wnt2b","Wnt3","Wnt3a","Wnt4","Wnt5a","Wnt6","Wnt7a","Wnt7b","Wnt8a","Wnt8b","Wnt9a","Wnt9b","Wnt10a","Wnt10b","Wnt11","Wnt16"), pt.size = 0.6)
VlnPlot(sut.combined, features = c("Porcn"), pt.size = 0.6)

Idents(peri.combined)
FeatureScatter(subset(peri.combined, idents = "6"), "Tnf", "Cxcr4")
FeatureScatter(peri.combined, "Ctsk", "Acp5")
FeatureScatter(subset(peri.combined, idents = c("7","15")), "S100a4", "Adgre1")

# Check average cluster expression for multiple markers simultaneously
# Idents should be cluster number only
Idents(sut.combined)
sut.cluster.averages_allsamples <- AverageExpression(sut.combined)
# Then run heatmap
includelist<- c("Arg1","Mgl2","Tmem26","Rnase2a","Mrc1","Egr2","Flt1","Chil3","Clec10a","Atp6v0d2","Fam198b","Matk","Soc2","Itgb3","Ocstamp")
heatmapdata <- subset(sut.cluster.averages_allsamples$integrated, rownames(sut.cluster.averages_allsamples$integrated) %in% includelist)
dev.off()
heatmap(heatmapdata, Rowv=NA, Colv=NA, col = heat.colors(256),  margins=c(5,5))

sut.combined$orig.ident

Idents(peri.combined) <- peri.combined$cluster_sample
test1 <- FindMarkers(peri.combined, ident.1 = "15_youngALperi", ident.2 = "15_agedALperi", verbose = TRUE)
test2 <- FindMarkers(peri.combined, ident.1 = "15_agedALperi", ident.2 = "15_youngALperi", verbose = TRUE)
test3 <- FindMarkers(peri.combined, ident.1 = "9", ident.2 = "15", verbose = TRUE)

#========================================================================================================================================
# UCell - https://carmonalab.github.io/UCell_demo/UCell_Seurat_vignette.html
# Generates a numerical value for combined expression of multiple genes in given gene list (i.e. GO-term or manually curated)

if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("UCell")
library(Seurat)
library(UCell)
# set.seed(123)

DimPlot(object = peri.combined.osteo, group.by = "cluster", label = TRUE,
        label.size = 3, repel = TRUE)

# Load required lists from GO terms
# http://www.informatics.jax.org/vocab/gene_ontology/

markers <- list()
signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0009435&results=10000&startIndex=0&sort=term&dir=")
markers$nadbiosyntheticprocess <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0009435&results=10000&startIndex=0&sort=term&dir=")
markers$nadbiosyntheticprocess <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0009435&results=10000&startIndex=0&sort=term&dir=")
markers$nadbiosyntheticprocess <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0009435&results=10000&startIndex=0&sort=term&dir=")
markers$nadbiosyntheticprocess <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0050729&results=10000&startIndex=0&sort=term&dir=")
markers$positiveregulationofinflammatoryresponse <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0050728&results=10000&startIndex=0&sort=term&dir=")
markers$negativeregulationofinflammatoryresponse <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0042813&results=10000&startIndex=0&sort=term&dir=")
markers$wntreceptoractivity <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0017136&results=10000&startIndex=0&sort=term&dir=")
markers$naddependenthistonedeacetylaseactivity <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0030178&results=10000&startIndex=0&sort=term&dir=")
markers$negativeregulationofwntsignalingpathway <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0030177&results=10000&startIndex=0&sort=term&dir=")
markers$positiveregulationofwntsignalingpathway <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0006986&results=10000&startIndex=0&sort=term&dir=")
markers$unfoldedproteinresponse <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0010508&results=10000&startIndex=0&sort=term&dir=")
markers$positiveregulationofautophagy <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0010507&results=10000&startIndex=0&sort=term&dir=")
markers$negativeregulationofautophagy <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0045787&results=10000&startIndex=0&sort=term&dir=")
markers$positiveregulationofcellcycle <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0045786&results=10000&startIndex=0&sort=term&dir=")
markers$negativeregulationofcellcycle <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0045981&results=10000&startIndex=0&sort=term&dir=")
markers$positiveregulationofnucleotidemetabolicprocess <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0045980&results=10000&startIndex=0&sort=term&dir=")
markers$negativeregulationofnucleotidemetabolicprocess <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0061355&results=10000&startIndex=0&sort=term&dir=")
markers$wntsecretion <- as.list(signaturegenelist[,2])

markers <- list()
signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0001503&results=10000&startIndex=0&sort=term&dir=")
markers$ossification <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0001568&results=10000&startIndex=0&sort=term&dir=")
markers$bloodvesseldev <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0010257&results=10000&startIndex=0&sort=term&dir=")
markers$nadhdehydcomplex <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0016055&results=10000&startIndex=0&sort=term&dir=")
markers$wntsigpathway <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0060070&results=10000&startIndex=0&sort=term&dir=")
markers$canonwntsig <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0198738&results=10000&startIndex=0&sort=term&dir=")
markers$cellcellwntsig <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0000165&results=10000&startIndex=0&sort=term&dir=")
markers$mapkcascade <- as.list(signaturegenelist[,2])

signaturegenelist <- read.xlsx("http://www.informatics.jax.org/go/report.xlsx?goID=GO:0006120&results=10000&startIndex=0&sort=term&dir=")
markers$nadhtoubiquinone <- as.list(signaturegenelist[,2])

signature.names <- paste0(names(markers), "_UCell")

# Run the function to calculate the scores - takes longer for more tests
DefaultAssay(peri.combined_cd45neg_prrx1pos) <- "SCT"
peri.combined <- AddModuleScore_UCell(peri.combined, features = markers, maxRank = 30000)
sut.combined <- AddModuleScore_UCell(sut.combined, features = markers)

peri.combined.osteo2 <- AddModuleScore_UCell(peri.combined.osteo2, features = markers)

# Find which genelist you want to look at here, or remove square brackets to view all
as.matrix(signature.names)

# Plot entire samples split by condition
VlnPlot(sut.combined, features = "Prrx1", group.by = "cluster")
VlnPlot(peri.combined, features = signature.names[5], group.by = "orig.ident")


VlnPlot(peri.combined_cd45neg_prrx1pos, features = signature.names, group.by = "orig.ident") + geom_boxplot()

install.packages("RColorBrewer")
library(RColorBrewer)
FeaturePlot(peri.combined, reduction = "umap", features = signature.names, ncol = 3,
            order = T) & scale_colour_gradientn(colours = rev(brewer.pal(n = 11, name = "RdBu")))

FeaturePlot(peri.combined, reduction = "umap", features = signature.names[c(1,6,9,14)], split.by = "orig.ident",ncol = 3,
            order = T) & scale_colour_gradientn(colours = rev(brewer.pal(n = 11, name = "RdBu")))

signature.names

# Reorder plots
# peri.combinedtest$orig.ident <- factor(peri.combined$orig.ident, levels = c("youngALperi","adultALperi","adultIFperi","agedALperi","agedIFperi"))

Idents(peri.combined)

# Or, plot only certain types of cells, given by cluster number, and split by condition. NB: INVERT can be used to test other cells
Idents(peri.combined_cd45neg_prrx1pos) <- peri.combined_cd45neg_prrx1pos$orig.ident
seuratobjforUCell <- subset(sut.combined, idents = c("11"), invert = TRUE)
seuratobjforUCell <- peri.combined_cd45neg_prrx1pos

## count the number of cells per unique combinations of "orig.ident" and "clusters"
md <- seuratobjforUCell@meta.data %>% as.data.table
md[, .N, by = c("orig.ident")]

# Extract data using this
df <- seuratobjforUCell@meta.data[,endsWith(colnames(seuratobjforUCell@meta.data),"_UCell")]
df$cell_id <- row.names(df) 
require(dplyr)
df <- df %>%
  mutate(condition = gsub('_.*','',cell_id))
seuratobjforUCell <- AddModuleScore_UCell(seuratobjforUCell, features = markers, slot = "scale.data", assay = "SCT", maxRank = 30000)

# Find the columns containing signatures
colnames(df)
head(df[,c(36:43)])

# Remove empty signature columns if required
df <- seuratobjforUCell@meta.data[,!startsWith(colnames(seuratobjforUCell@meta.data),"signature")]
df <- seuratobjforUCell@meta.data[,endsWith(colnames(seuratobjforUCell@meta.data),"_UCell")]
# RECHECK THE HEAD AS NEEDED

output_list <- list()
shortensheettitle <- function(sheettitle){
  sheettitle <- gsub("positiveregulationof", "up_",sheettitle)
  sheettitle <- gsub("negativeregulationof", "dn_",sheettitle)
  sheettitle <- gsub("_UCell", "",sheettitle)
  sheettitle <- gsub("signalling", "sig",sheettitle)
  sheettitle <- gsub("signaling", "sig",sheettitle)}
# Repeat this block until all signatures exported
# Assign which columns contain signature data, match signature columns from above
for (i in 36:43)
{
df1 <- df[,c(1,i)]
sheettitle <- colnames(df1)[2]
sheettitle_short <- shortensheettitle(sheettitle)
# Reorganise dataframe to have orig.ident (condition) as the column
df2 <- df1 %>%
  group_by(orig.ident) %>%
  mutate(row = row_number()) %>%
  tidyr::pivot_wider(names_from = orig.ident, values_from = sheettitle) %>%
  select(-row)
nam <-  substr(paste(i, sheettitle_short, sep = "_"),1,31)
assign(nam, df2)
output_list[[nam]] <- assign(nam, df2)
}

write.xlsx(output_list,"output_signatures.xlsx")

peri.combined$positiveregulationofinflammatoryresponse_UCell
peri.combined$inflammation_UCell
peri.combined$customclassif

install.packages("RColorBrewer")
library(RColorBrewer)
FeaturePlot(peri.combined.osteo2, reduction = "umap", features = c("wntreceptoractivity_UCell","positiveregulationofwntsignalingpathway_UCell","naddependenthistonedeacetylaseactivity_UCell"), split.by = "orig.ident",
            ncol = 2, order = T, label = T) & scale_colour_gradientn(colours = rev(brewer.pal(n = 11, name = "RdBu")))

Idents(peri.combined.osteo2) <- peri.combined.osteo2$JRmanualcelltype

FeaturePlot(peri.combined, reduction = "umap", features = c("cluster"), ncol = 2, order = T) 

DimPlot(peri.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'cluster')+ NoLegend()+FeaturePlot(peri.combined, reduction = "umap", features = "inflammation_UCell")+FeaturePlot(peri.combined, reduction = "umap", features = "positiveregulationofinflammatoryresponse_UCell")+DimPlot(peri.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'customclassif') + NoLegend()
DimPlot(sut.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'cluster')+ NoLegend()+FeaturePlot(sut.combined, reduction = "umap", features = "positiveregulationofinflammatoryresponse_UCell")+DimPlot(sut.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'customclassif') + NoLegend()

Idents(peri.combined) <- "cluster"
DimPlot(peri.combined, reduction = "umap", label = TRUE, split.by = "orig.ident")

# ==============================================================================================
# Automated identification of cells with scType
# https://github.com/IanevskiAleksandr/sc-type/blob/master/README.md

# load libraries and functions
install.packages("HGNChelper")
library("HGNChelper")
lapply(c("dplyr","Seurat","HGNChelper"), library, character.only = T)

# load gene set preparation function
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R")

# load cell type annotation function
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")

# DB file
db_ = "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx";
tissue = "Immune system" # e.g. Immune system,Pancreas,Liver,Eye,Kidney,Brain,Lung,Adrenal,Heart,Intestine,Muscle,Placenta,Spleen,Stomach,Thymus 

# prepare gene sets
gs_list = gene_sets_prepare(db_, tissue)

# Subset seurat object as required and pass to scType
Idents(peri.combined) <- peri.combined$cd45
# seuratobjforscType <- subset(peri.combined, idents = "cd45.pos")
seuratobjforscType <- sut.combined

peri.combined.osteo <- subset(peri.combined, idents = "cd45.neg")


# get cell-type by cell matrix
es.max = sctype_score(scRNAseqData = seuratobjforscType[["integrated"]]@scale.data, scaled = TRUE, 
                      gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)

# NOTE: scRNAseqData parameter should correspond to your input scRNA-seq matrix. 
# In case Seurat is used, it is either pbmc[["RNA"]]@scale.data (default), pbmc[["SCT"]]@scale.data, in case sctransform is used for normalization,
# or pbmc[["integrated"]]@scale.data, in case a joint analysis of multiple single-cell datasets is performed.

# merge by cluster
cL_resutls = do.call("rbind", lapply(unique(seuratobjforscType@meta.data$seurat_clusters), function(cl){
  es.max.cl = sort(rowSums(es.max[ ,rownames(seuratobjforscType@meta.data[seuratobjforscType@meta.data$seurat_clusters==cl, ])]), decreasing = !0)
  head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(seuratobjforscType@meta.data$seurat_clusters==cl)), 10)
}))

sctype_scores = cL_resutls %>% group_by(cluster) %>% top_n(n = 1, wt = scores)  

# set low-confident (low ScType score) clusters to "unknown"
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
print(sctype_scores[,1:3])

seuratobjforscType@meta.data$customclassif = ""
for(j in unique(sctype_scores$cluster)){
  cl_type = sctype_scores[sctype_scores$cluster==j,]; 
  seuratobjforscType@meta.data$customclassif[seuratobjforscType@meta.data$seurat_clusters == j] = as.character(cl_type$type[1])
}

# Rename the object to its correct name
sut.combined <- seuratobjforscType

plot1 <- DimPlot(peri.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'customclassif') + NoLegend()
plot2 <- DimPlot(peri.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'cluster') + NoLegend()
plot3 <- DimPlot(peri.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'orig.ident')
plot4 <- DimPlot(peri.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'cd45') + NoLegend()
plot5 <- DimPlot(peri.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'prrx1') + NoLegend()

plot1+plot2+plot3+plot4+plot5

plot6 <- DimPlot(sut.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'customclassif') + NoLegend()
plot7 <- DimPlot(sut.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'cluster') + NoLegend()
plot8 <- DimPlot(sut.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'orig.ident')
plot9 <- DimPlot(sut.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'cd45') + NoLegend()
plot10 <- DimPlot(sut.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'prrx1') + NoLegend()

plot6+plot7+plot8+plot9+plot10

DimPlot(peri.prrx1, reduction = "umap", label = TRUE, repel = TRUE, split.by = 'orig.ident') + NoLegend()


# ==============================================================================================



genesofinterest <- c("Nampt")
FeaturePlot(sut.combined, features = genesofinterest, split.by = "orig.ident", ncol=4) + RotatedAxis()

Idents(sut.combined) <- sut.combined$orig.ident

DimPlot(sut.combined,
        label = TRUE, 
        split.by = "orig.ident")

RidgePlot(sut.combined, features = genesofinterest, ncol = 2)

Idents(peri.combined) <- peri.combined$cluster_sample

degs0 <- FindMarkers(peri.combined, ident.1 = "0_agedIFperi", ident.2 = "0_agedALperi")
degs1 <- FindMarkers(peri.combined, ident.1 = "1_agedIFperi", ident.2 = "1_agedALperi")
degs3 <- FindMarkers(peri.combined, ident.1 = "3_agedIFperi", ident.2 = "3_agedALperi")
degs9 <- FindMarkers(peri.combined, ident.1 = "9_agedIFperi", ident.2 = "9_agedALperi")
degs15 <- FindMarkers(peri.combined, ident.1 = "15_agedIFperi", ident.2 = "15_agedALperi")
degs8 <- FindMarkers(peri.combined, ident.1 = "8_agedIFperi", ident.2 = "8_agedALperi")


sutDEGs_youngALvsagedAL <- FindMarkers(sut.combined, ident.1 = "youngALsut", ident.2 = "agedALsut")
# sutDEGs_youngALvsadultAL <- FindMarkers(sut.combined, ident.1 = "youngALsut", ident.2 = "adultALsut")
# sutDEGs_adultALvsagedAL <- FindMarkers(sut.combined, ident.1 = "adultALsut", ident.2 = "agedALsut")
# sutDEGs_agedIFvsagedAL <- FindMarkers(sut.combined, ident.1 = "agedIFsut", ident.2 = "agedALsut")
# sutDEGs_adultIFvsadultAL <- FindMarkers(sut.combined, ident.1 = "adultIFsut", ident.2 = "adultALsut")
# sutDEGs_youngALvsagedIF <- FindMarkers(sut.combined, ident.1 = "youngALsut", ident.2 = "agedIFsut")
# 
# Idents(sut.combined) <- sut.combined$cluster_sample
# c11sutDEGs_agedIFvsagedAL <- FindMarkers(sut.combined, ident.1 = "11_agedIFsut", ident.2 = "11_agedALsut")
# c11sutDEGs_agedIFvsagedAL[nad_genelist,]
# c11sutDEGs_agedIFvsagedAL[autoph_genelist,]
# c11sutDEGs_agedIFvsagedAL[wnt_genelist,]
# c11sutDEGs_agedIFvsagedAL["Cebpb",]
# 
# c11sutDEGs_youngALvsagedAL[nad_genelist,]
# 
# c11sutDEGs_adultALvsagedAL <- FindMarkers(sut.combined, ident.1 = "11_adultALsut", ident.2 = "11_agedALsut")
# c11sutDEGs_adultALvsagedAL[nad_genelist,]
# 
# Idents(sut.combined) <- sut.combined$cluster_sample
# c9sutDEGs_youngALvsagedAL <- FindMarkers(sut.combined, ident.1 = "9_youngALsut", ident.2 = "9_agedALsut")
# write.xlsx(c9sutDEGs_agedIFvsagedAL, "c9sutDEGs_agedIFvsagedAL.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# options(scipen = 999)
library(openxlsx)
# write.xlsx(sutDEGs_youngALvsagedAL, "sutDEGs_youngALvsagedAL.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)
# write.xlsx(sutDEGs_youngALvsadultAL, "sutDEGs_youngALvsadultAL.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)
# write.xlsx(sutDEGs_adultALvsagedAL, "sutDEGs_adultALvsagedAL.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)
# write.xlsx(sutDEGs_agedIFvsagedAL, "sutDEGs_agedIFvsagedAL.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)
# write.xlsx(sutDEGs_adultIFvsadultAL, "sutDEGs_adultIFvsadultAL.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)
# write.xlsx(sutDEGs_youngALvsagedIF, "sutDEGs_youngALvsagedIF.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)

write.xlsx(c11sutDEGs_agedIFvsagedAL, "c11sutDEGs_agedIFvsagedAL.xlsx", colNames=T,rowNames=T,showNA = F, Append = TRUE)

# Check if known genes of interest are considered DEGs
# sutDEGs_agedIFvsagedAL[nad_genelist,]
# sutDEGs_agedIFvsagedAL[autoph_genelist,]
# sutDEGs_agedIFvsagedAL[wnt_genelist,]
# 
# sutDEGs_youngALvsagedAL[wnt_genelist,]
# 
# sutDEGs_adultALvsagedAL[nad_genelist,]


# Comparing DEGs vs YoungAL in AgedAL and AgedIF
# Filter the DEG table to only include significant genes (padj<0.05)
# sig_sutDEGs_youngALvsagedAL <- sutDEGs_youngALvsagedAL[sutDEGs_youngALvsagedAL$p_val_adj<0.05,]
# sig_sutDEGs_youngALvsagedIF <- sutDEGs_youngALvsagedIF[sutDEGs_youngALvsagedIF$p_val_adj<0.05,]
# 
# UP_sig_sutDEGs_youngALvsagedAL <- sig_sutDEGs_youngALvsagedAL[sig_sutDEGs_youngALvsagedAL$avg_log2FC>0,]
# DOWN_sig_sutDEGs_youngALvsagedAL <- sig_sutDEGs_youngALvsagedAL[sig_sutDEGs_youngALvsagedAL$avg_log2FC<0,]
# 
# UP_sig_sutDEGs_youngALvsagedIF <- sig_sutDEGs_youngALvsagedIF[sig_sutDEGs_youngALvsagedIF$avg_log2FC>0,]
# DOWN_sig_sutDEGs_youngALvsagedIF <- sig_sutDEGs_youngALvsagedIF[sig_sutDEGs_youngALvsagedIF$avg_log2FC<0,]
# 
# # Which upregulated genes are DEG only in AL?
# g1up <- setdiff(rownames(UP_sig_sutDEGs_youngALvsagedAL),rownames(UP_sig_sutDEGs_youngALvsagedIF))
# length(g1up)
# write.xlsx(g1up,"g1up.xlsx")
# 
# # Which upregulated genes are DEG only in IF?
# g2up <- setdiff(rownames(UP_sig_sutDEGs_youngALvsagedIF),rownames(UP_sig_sutDEGs_youngALvsagedAL))
# length(g2up)
# 
# # Which upregulated genes are DEG in both?
# g3up <- intersect(rownames(UP_sig_sutDEGs_youngALvsagedAL),rownames(UP_sig_sutDEGs_youngALvsagedIF))
# length(g3up)
# 
# # Which downregulated genes are DEG only in AL?
# g1down <- setdiff(rownames(DOWN_sig_sutDEGs_youngALvsagedAL),rownames(DOWN_sig_sutDEGs_youngALvsagedIF))
# length(g1down)
# 
# write.xlsx(UP_sig_sutDEGs_youngALvsagedAL[g1up,],"UP_sig_sutDEGs_uniqueto_youngALvsagedAL.xlsx",colNames=T,rowNames=T,showNA = F, Append = TRUE)
# write.xlsx(DOWN_sig_sutDEGs_youngALvsagedIF[g2down,],"DOWN_sig_sutDEGs_uniqueto_youngALvsagedIF.xlsx",colNames=T,rowNames=T,showNA = F, Append = TRUE)
# 
# # Which downregulated genes are DEG only in IF?
# g2down <- setdiff(rownames(DOWN_sig_sutDEGs_youngALvsagedIF),rownames(DOWN_sig_sutDEGs_youngALvsagedAL))
# length(g2down)
# 
# # Which downregulated genes are DEG in both?
# g3down <- intersect(rownames(DOWN_sig_sutDEGs_youngALvsagedAL),rownames(DOWN_sig_sutDEGs_youngALvsagedIF))
# length(g3down)
# 
# # Above in progress, below is just notes
# # 
# # 
# # 
# # 
# 
# 
# sc_dataB <- Read10X(data.dir = "/Users/k1773283/OneDrive - King's College London/RNAseq analysis - Manuscript/JR scRNAseq/L236B")
# scB <- CreateSeuratObject(counts = sc_dataB$`Gene Expression`, project = "L236B", min.cells = 1, min.features = 100)
# scB
# 
# sc_dataC <- Read10X(data.dir = "/Users/k1773283/OneDrive - King's College London/RNAseq analysis - Manuscript/JR scRNAseq/L236C")
# scC <- CreateSeuratObject(counts = sc_dataC$`Gene Expression`, project = "L236C", min.cells = 1, min.features = 100)
# scC
# 
# sc_dataD <- Read10X(data.dir = "/Users/k1773283/OneDrive - King's College London/RNAseq analysis - Manuscript/JR scRNAseq/L236D")
# scD <- CreateSeuratObject(counts = sc_dataD$`Gene Expression`, project = "L236D", min.cells = 1, min.features = 100)
# scD
# 
# rm(sc_dataA)
# rm(sc_dataB)
# rm(sc_dataC)
# rm(sc_dataD)
# 
# # # create a named list of seurat objects to be merged
# seuratstobemergedlist <- c("scA","scB","scC","scD")
# # 
# # # optional but probably a good idea
# # # rename cells using object names as prefix
# for (i in names(seuratstobemergedlist)) {
#    seuratstobemergedlist[[i]] <- RenameCells(seuratstobemergedlist[[i]],
#                                           add.cell.id = i)
#  }
# 
# sc <- merge(scA, y = c(scB,scC,scD), add.cell.ids = c("L236A", "L236B","L236C","L236D"), project = "10X_SCRNA")
# sc
# 
# # store mitochondrial percentage in object meta data
# sc <- PercentageFeatureSet(sc, pattern = "^mt-", col.name = "percent.mt")
# sc@meta.data$percent.mt
# 
# # scA <- PercentageFeatureSet(scA, pattern = "^mt-", col.name = "percent.mt")
# # scA@meta.data$percent.mt
# 
# # Clear space for upcoming processing
# rm(scA)
# rm(scB)
# rm(scC)
# rm(scD)
# 
# sc <- SCTransform(sc, vars.to.regress = "percent.mt", verbose = FALSE)
# 
# sc
# # scA <- FindVariableFeatures(scA)
# # sc <- ScaleData(sc)
# 
# library(Seurat)
# sc <- readRDS(file = "scA_h1.rds")
# 
# These are now standard steps in the Seurat workflow for visualization and clustering

# Run for the PERI immune-depleted object
peri.combined.osteo <- FindVariableFeatures(peri.combined.osteo)
peri.combined.osteo <- ScaleData(peri.combined.osteo)
peri.combined.osteo <- RunPCA(peri.combined.osteo, verbose = TRUE)
ElbowPlot(peri.combined.osteo,ndims = 20,reduction="pca")
peri.combined.osteo <- RunUMAP(peri.combined.osteo, dims = 1:16, verbose = TRUE)
peri.combined.osteo <- FindNeighbors(peri.combined.osteo, dims = 1:16, verbose = TRUE)
peri.combined.osteo <- FindClusters(peri.combined.osteo, verbose = TRUE)
DimPlot(peri.combined.osteo2, group.by = "JRmanualcelltype",label = TRUE)
peri.combined.osteo$orig.ident

DimPlot(peri.combined, group.by = "cd45",label = TRUE)
DimPlot(sut.combined, group.by = "prrx1",label = TRUE)

getwd()
saveRDS(peri.combined.osteo, "immune-depletedperiosteum.rds")



# And for SUTURE immune-depleted object
sut.combined.osteo <- FindVariableFeatures(sut.combined.osteo)
sut.combined.osteo <- ScaleData(sut.combined.osteo)
sut.combined.osteo <- RunPCA(sut.combined.osteo, verbose = TRUE)
ElbowPlot(sut.combined.osteo,ndims = 20,reduction="pca")
sut.combined.osteo <- RunUMAP(sut.combined.osteo, dims = 1:14, verbose = TRUE)
sut.combined.osteo <- FindNeighbors(sut.combined.osteo, dims = 1:14, verbose = TRUE)
sut.combined.osteo <- FindClusters(sut.combined.osteo, verbose = TRUE)
sut.combined.osteo$cluster_new <- Idents(sut.combined.osteo)
DimPlot(sut.combined.osteo, group.by = "cluster_new", label = TRUE)
sut.combined.osteo$cluster

getwd()
saveRDS(sut.combined.osteo, "immune-depletedsuture.rds")

# DimPlot(sc, label = TRUE) + NoLegend()
# 
# savedclusters <- sc@active.ident
# 
# class(sc@active.ident)
# class(as.factor(sc$orig.ident))
# sc@active.ident <- as.factor(sc$orig.ident)
# 
# sc
# 
# L236Acells <- WhichCells(sc, idents = "L236A")
# L236Bcells <- WhichCells(sc, idents = "L236B")
# L236Ccells <- WhichCells(sc, idents = "L236C")
# L236Dcells <- WhichCells(sc, idents = "L236D")
# # g1_treat <- WhichCells(sc, idents = c( "group1_treated"))
# 
# # sc$Replicate <- savedclusters
# 
# Idents(sc) <- sc$orig.ident
# Idents(sc) <- savedclusters
# DimPlot(sc, label = TRUE) + NoLegend()
# DimPlot(sc, label=T, group.by= sc@active.ident, cells.highlight= list(L236Acells), cols.highlight = c("darkblue", "darkred"), cols= "grey")
# 
# # These are now standard steps in the Seurat workflow for visualization and clustering Visualize
# # canonical marker genes as violin plots.
# VlnPlot(sc, features = c("Lgr5"), 
#         pt.size = 0.2, ncol = 1)
# 
# # Visualize canonical marker genes on the sctransform embedding.
# 
# # Stem cell markers
# SSCsutmarkers <- c("Grem1","Axin2","Nes","Gli1","Thy1","Mcam")
# SSCperimarkers
# 
# 
# FeaturePlot(sc, features = SSCsutmarkers, pt.size = 0.2, 
#             ncol = 3)
# 
# FeaturePlot(sc, features = c("Lgr5","Axin2","Notch1","Ascl2","Cd44"), pt.size = 0.2, 
#             ncol = 1)
# 
# FeaturePlot(sc, features = c("Defa5","Wnt3","Mmp7"), pt.size = 0.2, 
#             ncol = 1)
# 
# FeaturePlot(sc, features = c("Top2a","Mki67"), pt.size = 0.2, 
#             ncol = 1)
# 
# FeaturePlot(sc, features = c("Krt7","Kcne3","Zg16","Tff3"), pt.size = 0.2, 
#             ncol = 1)
# 
# # Pseudobulk analysis by cluster
# cluster.averages <- AverageExpression(sc)
# 
# head(cluster.averages[["SCT"]][1:5,])
# head(cluster.averages[["SCT"]][5,])
# 
# Gene_list <- read.csv('~/Documents/RNASeq/SC/2021/genelistcrucial.csv')
# # Gene_list <- Genes.of.interest<-Gene_list %>% filter(Category == "Post-synaptic components")
# Gene_list <- Gene_list$Genes
# Gene_list <- as.data.frame(Gene_list)
# 
# pseudobulk.cluster.averages <- as.data.frame(cluster.averages[["SCT"]],)
# # rowsofinterest <- which(toupper(row.names(pseudobulk.cluster.averages)) %in% toupper(Gene_list))
# # pseudobulk.genesofinterest <- pseudobulk.cluster.averages[rowsofinterest,]
# 
# geneset1 <- c("Dlg3","Axin2","Lgr5")
# 
# wntliggenes <- Gene_list[1:20,]
# wntpathgenes <- Gene_list[21:162,]
# glurgenes <- Gene_list[163:188,]
# pregenes <- Gene_list[189:211,]
# postgenes <- Gene_list[212:223,]
# gluugenes <- Gene_list[224:231,]
# 
# 
# pseudobulk.genesofinterest[geneset1,]
# 
# pseudo.output <- pseudobulk.cluster.averages[Gene_list[,],]
# 
# pseudo.output <- filter(pseudo.output, rowSums(is.na(pseudo.output)) != ncol(pseudo.output))
# 
# pseudobulk.cluster.averages[wntliggenes,]
# pseudobulk.cluster.averages[wntpathgenes,]
# pseudobulk.cluster.averages[glurgenes,]
# pseudobulk.cluster.averages[pregenes,]
# pseudobulk.cluster.averages[postgenes,]
# pseudobulk.cluster.averages[gluugenes,]
# 
# write.csv(pseudo.output,"/Users/Josh/Documents/RNASeq/SC/2021/Pseudobulk/data/GSE130113/GSE130113.csv", row.names = TRUE)
# 
# genestoplot <- c("Wnt3","Homer1","Bsn")
# clusterstoshow <- c("7", "2", "1")
# pseudobarplot <- pseudobulk.cluster.averages[genestoplot,clusterstoshow]
# 
# p<-ggplot(data=pseudobarplot, aes(x = "4")) +
#   geom_bar(stat="identity")
# 
# barplot(t(as.matrix(pseudobarplot)),beside=TRUE)
# 
# sc[["RNA"]]@counts["Lgr5",]
# DefaultAssay(sc) <- "RNA"
# Lgr5cells <- subset(x = sc, subset = Lgr5 > 0)
# Lgr5cells <- SCTransform(Lgr5cells, vars.to.regress = "percent.mt", verbose = FALSE)
# Idents(Lgr5cells) <- Lgr5cells$orig.ident
# Lgr5avgs <- as.data.frame(AverageExpression(Lgr5cells, features = c(postgenes,pregenes,glurgenes), assays = "RNA", return.seurat = FALSE))
# 
# subset(x = sc, subset = Mmp7 > 0)
# Mmp7cells <- subset(x = sc, subset = Mmp7 > 0)
# Lgr5cells <- SCTransform(Lgr5cells, vars.to.regress = "percent.mt", verbose = FALSE)
# Idents(Lgr5cells) <- Lgr5cells$orig.ident
# 
# sc <- SCTransform(sc, vars.to.regress = "percent.mt", verbose = FALSE)
# Idents(sc) <- savedclusters
# scavgs <- as.data.frame(AverageExpression(sc, features = c(postgenes,pregenes,glurgenes), assays = "RNA", return.seurat = FALSE))
# 
# 
# mergeddata <- merge(scavgs,Lgr5avgs, by = 'row.names')
# 
# write.csv(mergeddata,"/Users/Josh/Documents/RNASeq/SC/2021/Pseudobulk/data/GSE130113/mergedLgr5_Clusters.csv", row.names = TRUE)
# 
# 
# # Recluster by subsetting to the clusters of interest
# # Idents(sc)
# # 
# # subsc <- subset(sc, idents = c("4", "8","11"))
# # subsc <- RunPCA(subsc, verbose = FALSE)
# # subsc <- RunUMAP(subsc, dims = 1:30, verbose = FALSE)
# # 
# # subsc <- FindNeighbors(subsc, dims = 1:30, verbose = FALSE)
# # subsc <- FindClusters(subsc, verbose = FALSE)
# # DimPlot(subsc, label = TRUE) + NoLegend()
# # 
# # FeaturePlot(subsc, features = c("Defa5","Wnt3","Lgr5","Ascl2","Top2a"), pt.size = 0.2, 
# #             ncol = 1)
